clear all
set more off
set maxvar 10000


/*Note: This do-file creates figures 5 & 6.*/

*******************************************************
* Black-White Catch Up Between Early and Late Cohorts *
*******************************************************

*------------------*
* RANK-RANK 
*------------------*

    foreach gend in men women {

	use "$Mydirectory1/3_Output/2_PooledData_analysis.dta", clear 
        
        if "`gend'"=="men" {
            keep if sex==1
        }
        if "`gend'"=="women" {
            keep if sex==2
        }
            
        keep if baseline_sample==1 
        label var rank_son_baseline " "
        label var rank_father_baseline " "
        
        gen early = (decade>=1910 & decade<=1920)
        gen late = (decade>=1940 & decade<=1950)
        keep if early==1 | late==1
		
		gen yhat_fam= .
		gen yhat_father= .
		
		foreach name in early late { 
		forval r=1/2 { 
		
			quietly reg rank_son_baseline i.decade if `name'==1 & race==`r'  [aw=wgt_sex_race]
						predict yhat_fam`name'`r' if `name'==1 & race==`r' , residuals
						sum rank_son_baseline if `name'==1 & race==`r'  [aw=wgt_sex_race]
						replace yhat_fam = yhat_fam`name'`r' + `r(mean)' if `name'==1 & race==`r' 
				
			quietly reg rank_father_baseline i.decade if `name'==1 & race==`r' [aw=wgt_sex_race]
						predict yhat_father`name'`r' if `name'==1 & race==`r' , residuals
						sum rank_father_baseline if `name'==1 & race==`r' [aw=wgt_sex_race]
						replace yhat_father = yhat_father`name'`r' + `r(mean)' if `name'==1 & race==`r' 
		}
		}
		
		replace rank_son_baseline = yhat_fam 
		replace rank_father_baseline = yhat_father

        tempfile fulldata
        save `fulldata'
        
    * Run binscatters by period and race; save the scatter points
        forval r=1/2 { 
            foreach name in early late {   
                binscatter rank_son_baseline rank_father_baseline if `name'==1 & race==`r' [aw=wgt_sex_race],  nq(20) savedata("binscatter_r`r'_`name'") replace
            } 
        } 
     
        forval r=1/2 { 
            foreach name in early late { 
                insheet using "binscatter_r`r'_`name'.csv", clear 
                gen race=`r' 
                gen period="`name'" 
                save "binscatter_r`r'_`name'.dta", replace
            } 
        } 
	
    * Append
        use  "binscatter_r1_early", clear
        append using    "binscatter_r2_early" 
        append using    "binscatter_r1_late" 
        append using    "binscatter_r2_late" 

    * Run the regressions on the full panel; save constant and slope 
        gen slope=.
        gen intercept=.
	
        forval r=1/2 { 
            foreach name in early late {         
                preserve
                use `fulldata', clear
                
                quietly reg rank_son_baseline rank_father_baseline if `name'==1 & race==`r' [aw=wgt_sex_race]
                restore 
                
                replace slope = _b[rank_father_baseline] if period=="`name'" & race==`r'
                replace intercept = _b[_cons] if period=="`name'" & race==`r'
            }
        }
        
    * Generate predicted values for each point using slope and intercept
        gen yhat = intercept + (slope*rank_father_baseline)
        
        local son_measure "rank_son_baseline"
        local father_measure "rank_father_baseline"

    *Figure 

	    if "`gend'"=="men" local title "figure5_rank"
        if "`gend'"=="women" local title "figure6_rank"
	
     #delimit ;
        twoway (scatter `son_measure' `father_measure' if race==1 & period=="early", m(oh) mc(midblue)) 
               (line yhat `father_measure' if race==1 & period=="early", lp(dash) lc(midblue))  
               (scatter `son_measure' `father_measure' if race==2 & period=="early", m(+) mc(orange_red*0.8)) 
               (line yhat `father_measure' if race==2 & period=="early", lp(dash) lc(orange_red*0.8))
               (scatter `son_measure' `father_measure' if race==1 & period=="late", m(dh) mc(navy)) 
               (line yhat `father_measure' if race==1 & period=="late", lp(dash) lc(navy))  
               (scatter `son_measure' `father_measure' if race==2 & period=="late", m(t) mc(cranberry)) 
               (line yhat `father_measure' if race==2 & period=="late", lp(dash) lc(cranberry)),
        legend(on order(1 3 5 7) label(1 "White, 1910-1929") label(3 "Black, 1910-1929") 
        label(5 "White, 1940-1959") label(7 "Black, 1940-1959") ring(0) pos(5)) ylabel(15(10)65, axis(1)) yscale(r(20 65))
        xti(" " "Predicted parental rank") yti("Respondent rank" " ")  ;
    #delimit cr
	graph export "$Mydirectory2/main_figures_tables/`title'.pdf", as(pdf) replace	
	

    * Get rid of binscatters
        forval r=1/2 { 
            foreach name in early late { 
                cap rm "binscatter_r`r'_`name'.csv" 
                cap rm "binscatter_r`r'_`name'.do" 
                cap rm "binscatter_r`r'_`name'.dta" 
                cap rm "binscatter_r`r'_`name'.csv.do" 
            } 
        } 

    
    }

*-----------------------------------------------------------------------------*
*-----------------------------------------------------------------------------*

*------------------*
* IGE
*------------------*

    foreach gend in men women {

		use "$Mydirectory1/3_Output/2_PooledData_analysis.dta", clear 
        keep if baseline_sample==1
        
        if "`gend'"=="men" {
            keep if sex==1
        }
        if "`gend'"=="women" {
            keep if sex==2
        }

        gen early = (decade>=1910 & decade<=1920)
        gen late = (decade>=1940 & decade<=1950)
        keep if early==1 | late==1
		
		gen yhat_fam= .
		gen yhat_father= .
		
		foreach name in early late { 
		forval r=1/2 { 
		
			quietly reg log_son_baseline i.decade if `name'==1 & race==`r'  [aw=wgt_sex_race]
						predict yhat_fam`name'`r' if `name'==1 & race==`r' , residuals
						sum log_son_baseline if `name'==1 & race==`r'  [aw=wgt_sex_race]
						replace yhat_fam = yhat_fam`name'`r' + `r(mean)' if `name'==1 & race==`r' 
				
			quietly reg log_father_baseline i.decade if `name'==1 & race==`r' [aw=wgt_sex_race]
						predict yhat_father`name'`r' if `name'==1 & race==`r' , residuals
						sum log_father_baseline if `name'==1 & race==`r' [aw=wgt_sex_race]
						replace yhat_father = yhat_father`name'`r' + `r(mean)' if `name'==1 & race==`r' 
		}
		}
		
		replace log_son_baseline = yhat_fam 
		replace log_father_baseline = yhat_father	
        
                
        tempfile fulldata
        save `fulldata'
                 
        
    * Run binscatters by race and period; save the scatter points
        foreach name in early late { 
            forval r=1/2  {    
                binscatter log_son_baseline log_father_baseline if `name'==1 & race==`r' [aw=wgt_sex_race], nq(20) savedata("binscatter_r`r'_`name'") replace
            } 
        }

        forval r=1/2 { 
            foreach name in early late { 
                insheet using "binscatter_r`r'_`name'.csv", clear 
                gen race=`r' 
                gen period="`name'" 
                save "binscatter_r`r'_`name'.dta", replace
            } 
        } 

    * Append
        use  "binscatter_r1_early", clear
        append using    "binscatter_r2_early" 
        append using    "binscatter_r1_late" 
        append using    "binscatter_r2_late" 

    * Run the regressions on the full panel; save constant and slope 
        gen slope=.
        gen intercept=.
        
        foreach period in early late {
            foreach race in 1 2 { 
                preserve
                    use `fulldata', clear         
                    keep if race==`race' & `period'==1
                    
                    reg log_son_baseline log_father_baseline  [aw=wgt_sex_race] 
                    
                restore
                
                replace slope = _b[log_father_baseline]  if race==`race' & period=="`period'" 
                replace intercept = _b[_cons] if race==`race' & period=="`period'"
            }
        }

        
    * Generate predicted values for each point using slope and intercept
        gen yhat = intercept + (slope*log_father_baseline) 
        
        local son_measure "log_son_baseline"
        local father_measure "log_father_baseline"


	    if "`gend'"=="men" local title "figure5_ige"
        if "`gend'"=="women" local title "figure6_ige"
		
     #delimit ;
        twoway (scatter `son_measure' `father_measure' if race==1 & period=="early", m(oh) mc(midblue)) 
               (line yhat `father_measure' if race==1 & period=="early", lp(dash) lc(midblue))  
               (scatter `son_measure' `father_measure' if race==2 & period=="early", m(+) mc(orange_red*0.8)) 
               (line yhat `father_measure' if race==2 & period=="early", lp(dash) lc(orange_red*0.8))
               (scatter `son_measure' `father_measure' if race==1 & period=="late", m(dh) mc(navy)) 
               (line yhat `father_measure' if race==1 & period=="late", lp(dash) lc(navy))  
               (scatter `son_measure' `father_measure' if race==2 & period=="late", m(t) mc(cranberry)) 
               (line yhat `father_measure' if race==2 & period=="late", lp(dash) lc(cranberry)),
        legend(on order(1 3 5 7) label(1 "White, 1910-1929") label(3 "Black, 1910-1929") 
        label(5 "White, 1940-1959") label(7 "Black, 1940-1959") ring(0) pos(5)) ylabel(7.5(0.5)9, axis(1)) yscale(r(7.5 9.25)) 
        xti(" " "Predicted parental logged income") yti("Respondent logged income" " ")  xlabel(6(0.5)9.5, axis(1)) xscale(r(6 9.5)) ;
    #delimit cr
	graph export "$Mydirectory2/main_figures_tables/`title'.pdf", as(pdf) replace


    * Get rid of binscatters
    forval r=1/2 { 
        foreach name in early late { 
            cap rm "binscatter_r`r'_`name'.csv" 
            cap rm "binscatter_r`r'_`name'.do" 
            cap rm "binscatter_r`r'_`name'.dta" 
            cap rm "binscatter_r`r'_`name'.csv.do" 
        } 
    }
	}